########################################################################################################
########################################################################################################
####Appendix: Non-state Atrocities in Capital Cities - Forecasting and ROC Curves (based on Model 4)####
########################################################################################################
########################################################################################################
library(pROC)
library(MASS) 
library(pscl)
library(foreign)
library(stargazer)
library(mvtnorm)
library(plyr)
library(robust)
library(cvTools)
library(boot)

# Set working library
setwd("~/Data/Global Analysis/")
# Read in main data
main.data <- read.dta("full.grid.upd.dta")
###create data with no NAs for forecasting 
#(running with the regular dataset yields identical results as the model automatically drops NA.
#This NA data is for the ROCs)
non.miss.data.oil<-na.omit(main.data[,c("incidentnonstatefull", "lag_Capital", "loglagttime", "lagcivconflagtemp","loglagross_oil_prod", "loglagppp", "lagincidentsumfull", "lagurban", "loglagbdist1", "gid", "loglagcellarea","lagp_polity2", "loglagpop", "year95", "year96","year97","year98","year99","year00","year01","year02","year03","year04","year05","year06","year07","year08","year09", "year")])

###Run Model 4 (silent)
at.zinb.4 <- zeroinfl(incidentnonstatefull ~ lag_Capital + lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                      + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea + year96 + year97+
                        year98 + year99 + year00 + year01 + year02 + year03 + year04 + year05 + year06 + year07 + year08
                      |lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                      + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea, 
                      dist = "negbin", data = non.miss.data.oil)

#Create a string based on data with omitted NAs
incidentnonstatefull <- non.miss.data.oil$incidentnonstatefull
###Predicted probabilities of Model 4
at.zinb.4.hats <- predict(at.zinb.4, type="response")
###Create 95% CIs for ROC curves
ci.at.zinb.4 <- ci.thresholds(roc(incidentnonstatefull, at.zinb.4.hats), conf.level=0.95)
###Plot the ROC curve with the 95% CIs
pdf("ROC4insample.pdf")
at.zinb.4.roc <- plot.roc(incidentnonstatefull, at.zinb.4.hats,
                          main="RoC curve, Model 4 (in sample)",  xlab="False atrocity positives",ylab="True atrocity positives",
                          print.auc=TRUE, print.auc.x=0.5, print.auc.y=0.1, ci=TRUE)
plot(ci.at.zinb.4, length=.01*ifelse(attr(ci.at.zinb.4,"roc")$percent, 100, 1), col=par("fg"))
dev.off()

###Now plot without capital to create a comparable ROC curve
#Run a comparable model without the capital indicator
at.zinb.4b <- zeroinfl(incidentnonstatefull ~ lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                       + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea + year96 + year97+
                         year98 + year99 + year00 + year01 + year02 + year03 + year04 + year05 + year06 + year07 + year08
                       |lagcivconflagtemp + lagincidentsumfull + lagurban + loglagross_oil_prod
                       + loglagppp + loglagbdist1 + lagp_polity2 + loglagpop + loglagttime + loglagcellarea, 
                       dist = "negbin", data = non.miss.data.oil)
###Predicted probabilities of the model without capital city
at.zinb.4b.hats <- predict(at.zinb.4b, type="response")
###Create 95% CIs for this model
ci.at.zinb.4b <- ci.thresholds(roc(incidentnonstatefull, at.zinb.4b.hats), conf.level=0.95)
###Plot the ROC curve of the alternative model with the 95% CIs
pdf("ROC4binsample.pdf")
at.zinb.4b.roc <- plot.roc(incidentnonstatefull, at.zinb.4b.hats,
                           main="RoC curve, Model 4B: no capital (in sample)",  xlab="False atrocity positives",ylab="True atrocity positives",
                           print.auc=TRUE, print.auc.x=0.5, print.auc.y=0.1, ci=TRUE)
plot(ci.at.zinb.4b, length=.01*ifelse(attr(ci.at.zinb.4b,"roc")$percent, 100, 1), col=par("fg"))
dev.off()

###Export both models to LaTex
stargazer(at.zinb.4, at.zinb.4b)
stargazer(at.zinb.4, at.zinb.4b, zero.component = TRUE)
AIC(at.zinb.4, at.zinb.4b)

###Delong et al test
roc.test(at.zinb.4.roc, at.zinb.4b.roc, reuse.auc=FALSE)

###Calucalte prediction error using K-Fold cross-validation 
#(10 folds, trimmed to 0.1, meaning we trim n*0.1 observations from each side of the data, 
#when it is ordered, then calculate the average)
p4.cv <- cvFit(at.zinb.4, data=non.miss.data.oil, y=non.miss.data.oil$incidentnonstatefull, cost=rtmspe, K=10, R=1, costArgs = list(trim = 0.1), seed = 1234)
p4.cv
p4b.cv <- cvFit(at.zinb.4b, data=non.miss.data.oil, y=non.miss.data.oil$incidentnonstatefull, cost=rtmspe, K=10, R=1, costArgs = list(trim = 0.1), seed = 1234)
p4b.cv